User Data Analytics

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.4     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   2.0.1     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(knitr) 
library(kableExtra) 
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(flextable) 
## 
## Attaching package: 'flextable'
## The following objects are masked from 'package:kableExtra':
## 
##     as_image, footnote
## The following object is masked from 'package:purrr':
## 
##     compose
library(gmodels) 
library(Hmisc) 
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(statar) 
library(ggpubr) 
## 
## Attaching package: 'ggpubr'
## The following objects are masked from 'package:flextable':
## 
##     border, font, rotate
salad_coupon <- read.csv("/Users/bowenjin/Desktop/Lion's Choice/rfm_trans.csv")
salad_coupon
dim(salad_coupon)
## [1] 25763     6
#response rate of the coupon
CrossTable(salad_coupon$X13....2.Off.Butcher.Block.Sala.13...Redeemed)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  25763 
## 
##  
##           |         0 |         1 | 
##           |-----------|-----------|
##           |     25636 |       127 | 
##           |     0.995 |     0.005 | 
##           |-----------|-----------|
## 
## 
## 
## 

Response rate is 0.5%

# Create the quintiles for R, F, M
salad_coupon1 <- salad_coupon %>% 
  summarise(CardNumber = Card.Number,
            recency = Last.Guest.Activity.Date,
            frequency = X10...Visits.10...Balance,
            monetary = X1...Dollars.Spent.1...Balance,
            coupon = X13....2.Off.Butcher.Block.Sala.13...Redeemed,
            rec_quin = xtile(Last.Guest.Activity.Date, 5),
            freq_quin = xtile(X10...Visits.10...Balance, 5),
            mv_quin = xtile(X1...Dollars.Spent.1...Balance, 5))
salad_coupon1
#check and adjust ranking for R, F, M

#Recency rank
salad_coupon1 %>% group_by(rec_quin) %>% summarise(avg_rec = mean(recency), .groups="drop")
#Frequency rank adjust
salad_coupon1 %>% group_by(freq_quin) %>% summarise(avg_freq = mean(frequency), .groups="drop")
salad_coupon1$freq_quin <- max(salad_coupon1$freq_quin) + 1 - salad_coupon1$freq_quin
salad_coupon1 %>% group_by(freq_quin) %>% summarise(avg_freq = mean(frequency), .groups="drop")
#Monetary rank adjust
salad_coupon1 %>% group_by(mv_quin) %>% summarise(avg_mv = mean(monetary), .groups="drop")
salad_coupon1$mv_quin <- max(salad_coupon1$mv_quin) + 1 - salad_coupon1$mv_quin
salad_coupon1 %>% group_by(mv_quin) %>% summarise(avg_mv = mean(monetary), .groups="drop")
#create rfm index
salad_coupon1 <- salad_coupon1 %>% 
  mutate(rfmindex_iq = 100*rec_quin + 10*freq_quin + mv_quin)

salad_coupon1
#response rate in each RFM group
avg_resp_rate_rfm <- salad_coupon1 %>% 
  group_by(rfmindex_iq) %>% 
  summarise(resp_rate_rfm_iq=mean(coupon), .groups="drop") %>% 
  arrange(desc(resp_rate_rfm_iq))
avg_resp_rate_rfm
bar_avg_resp_rate_rfm <- 
  ggplot(data=avg_resp_rate_rfm, 
         aes(x = as.factor(rfmindex_iq), y = resp_rate_rfm_iq)) + 
  labs(x="RFM Cells", 
       y="Average Response Rate", 
       title = "Response Rates by Independent RFM Cells") + 
  theme(plot.title = element_text(hjust = 0.5)) +
  geom_bar(stat="identity") + 
  scale_x_discrete(breaks = seq(111, 555, by = 5))
bar_avg_resp_rate_rfm

#response rate for every member
salad_coupon1 <- salad_coupon1 %>% 
  group_by(rfmindex_iq) %>% 
  mutate(resp_rate_by_rfm_iq = mean(coupon)) %>% ungroup()

salad_coupon1